1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package build.tools.generatecharacter;
27
28 import java.io.BufferedReader;
29 import java.io.FileReader;
30 import java.io.FileNotFoundException;
31 import java.io.IOException;
32 import java.util.StringTokenizer;
33 import java.io.File;
34 import java.util.regex.Pattern;
35 import java.util.ArrayList;
36
37
38
39
40
41
42
43
44
45
46
47 public class UnicodeSpec {
48
49 private static final int MAP_UNDEFINED = 0xFFFFFFFF;
50
51
52
53
54
55
56 public UnicodeSpec() {
57 this(0xffff);
58 }
59
60
61
62
63
64
65 public UnicodeSpec(int codePoint) {
66 this.codePoint = codePoint;
67 generalCategory = UNASSIGNED;
68 bidiCategory = DIRECTIONALITY_UNDEFINED;
69 mirrored = false;
70 titleMap = MAP_UNDEFINED;
71 upperMap = MAP_UNDEFINED;
72 lowerMap = MAP_UNDEFINED;
73 decimalValue = -1;
74 digitValue = -1;
75 numericValue = "";
76 oldName = null;
77 comment = null;
78 name = null;
79 }
80
81
82
83
84
85
86 public String toString() {
87 StringBuffer result = new StringBuffer(hex6(codePoint));
88 if (getUpperMap() != MAP_UNDEFINED) {
89 result.append(", upper=").append(hex6(upperMap));
90 }
91 if (getLowerMap() != MAP_UNDEFINED) {
92 result.append(", lower=").append(hex6(lowerMap));
93 }
94 if (getTitleMap() != MAP_UNDEFINED) {
95 result.append(", title=").append(hex6(titleMap));
96 }
97 return result.toString();
98 }
99
100 static String hex4(int n) {
101 String q = Integer.toHexString(n & 0xFFFF).toUpperCase();
102 return "0000".substring(Math.min(4, q.length())) + q;
103 }
104
105 static String hex6(int n) {
106 String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
107 return "000000".substring(Math.min(6, str.length())) + str;
108
109 }
110
111
112
113
114
115
116
117
118
119 public static UnicodeSpec parse(String s) {
120 UnicodeSpec spec = null;
121 String[] tokens = null;
122
123 try {
124 tokens = tokenSeparator.split(s, REQUIRED_FIELDS);
125 spec = new UnicodeSpec();
126 spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE]));
127 spec.setName(parseName(tokens[FIELD_NAME]));
128 spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY]));
129 spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI]));
130 spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS]));
131 spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION]));
132 spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL]));
133 spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT]));
134 spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC]));
135 spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED]));
136 spec.setOldName(parseOldName(tokens[FIELD_OLDNAME]));
137 spec.setComment(parseComment(tokens[FIELD_COMMENT]));
138 spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE]));
139 spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE]));
140 spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE]));
141 }
142
143 catch(Exception e) {
144 spec = null;
145 System.out.println("Error parsing spec line.");
146 }
147 return spec;
148 }
149
150
151
152
153
154
155
156
157
158
159
160 public static int parseCodePoint(String s) throws NumberFormatException {
161 return Integer.parseInt(s, 16);
162 }
163
164 public static String parseName(String s) throws Exception {
165 if (s==null) throw new Exception("Cannot parse name.");
166 return s;
167 }
168
169 public static byte parseGeneralCategory(String s) throws Exception {
170 byte category = GENERAL_CATEGORY_COUNT;
171
172 for (byte x=0; x<generalCategoryList.length; x++) {
173 if (s.equals(generalCategoryList[x][SHORT])) {
174 category = x;
175 break;
176 }
177 }
178 if (category >= GENERAL_CATEGORY_COUNT) {
179 throw new Exception("Could not parse general category.");
180 }
181 return category;
182 }
183
184 public static byte parseBidiCategory(String s) throws Exception {
185 byte category = DIRECTIONALITY_CATEGORY_COUNT;
186
187 for (byte x=0; x<bidiCategoryList.length; x++) {
188 if (s.equals(bidiCategoryList[x][SHORT])) {
189 category = x;
190 break;
191 }
192 }
193 if (category >= DIRECTIONALITY_CATEGORY_COUNT) {
194 throw new Exception("Could not parse bidi category.");
195 }
196 return category;
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214 public static int parseCombiningClass(String s) throws Exception {
215 int combining = -1;
216 if (s.length()>0) {
217 combining = Integer.parseInt(s, 10);
218 }
219 return combining;
220 }
221
222
223
224
225
226
227
228
229
230
231
232 public static String parseDecomposition(String s) throws Exception {
233 if (s==null) throw new Exception("Cannot parse decomposition.");
234 return s;
235 }
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251 public static int parseDecimalValue(String s) throws NumberFormatException {
252 int value = -1;
253
254 if (s.length() > 0) {
255 value = Integer.parseInt(s, 10);
256 }
257 return value;
258 }
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273 public static int parseDigitValue(String s) throws NumberFormatException {
274 int value = -1;
275
276 if (s.length() > 0) {
277 value = Integer.parseInt(s, 10);
278 }
279 return value;
280 }
281
282 public static String parseNumericValue(String s) throws Exception {
283 if (s == null) throw new Exception("Cannot parse numeric value.");
284 return s;
285 }
286
287 public static String parseComment(String s) throws Exception {
288 if (s == null) throw new Exception("Cannot parse comment.");
289 return s;
290 }
291
292 public static boolean parseMirrored(String s) throws Exception {
293 boolean mirrored;
294 if (s.length() == 1) {
295 if (s.charAt(0) == 'Y') {mirrored = true;}
296 else if (s.charAt(0) == 'N') {mirrored = false;}
297 else {throw new Exception("Cannot parse mirrored property.");}
298 }
299 else { throw new Exception("Cannot parse mirrored property.");}
300 return mirrored;
301 }
302
303 public static String parseOldName(String s) throws Exception {
304 if (s == null) throw new Exception("Cannot parse old name");
305 return s;
306 }
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321 public static int parseUpperMap(String s) throws NumberFormatException {
322 int upperCase = MAP_UNDEFINED;
323
324 int length = s.length();
325 if (length >= 4 && length <=6) {
326 upperCase = Integer.parseInt(s, 16);
327 }
328 else if (s.length() != 0) {
329 throw new NumberFormatException();
330 }
331 return upperCase;
332 }
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347 public static int parseLowerMap(String s) throws NumberFormatException {
348 int lowerCase = MAP_UNDEFINED;
349 int length = s.length();
350 if (length >= 4 && length <= 6) {
351 lowerCase = Integer.parseInt(s, 16);
352 }
353 else if (s.length() != 0) {
354 throw new NumberFormatException();
355 }
356 return lowerCase;
357 }
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372 public static int parseTitleMap(String s) throws NumberFormatException {
373 int titleCase = MAP_UNDEFINED;
374 int length = s.length();
375 if (length >= 4 && length <= 6) {
376 titleCase = Integer.parseInt(s, 16);
377 }
378 else if (s.length() != 0) {
379 throw new NumberFormatException();
380 }
381 return titleCase;
382 }
383
384
385
386
387
388
389
390
391
392
393 public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException {
394 ArrayList list = new ArrayList(3000);
395 UnicodeSpec[] result = null;
396 int count = 0;
397 BufferedReader f = new BufferedReader(new FileReader(file));
398 String line = null;
399 loop:
400 while(true) {
401 try {
402 line = f.readLine();
403 }
404 catch (IOException e) {
405 break loop;
406 }
407 if (line == null) break loop;
408 UnicodeSpec item = parse(line.trim());
409 int specPlane = (int)(item.getCodePoint() >>> 16);
410 if (specPlane < plane) continue;
411 if (specPlane > plane) break;
412
413 if (item != null) {
414 list.add(item);
415 }
416 }
417 result = new UnicodeSpec[list.size()];
418 list.toArray(result);
419 return result;
420 }
421
422 void setCodePoint(int value) {
423 codePoint = value;
424 }
425
426
427
428
429
430 public int getCodePoint() {
431 return codePoint;
432 }
433
434 void setName(String name) {
435 this.name = name;
436 }
437
438 public String getName() {
439 return name;
440 }
441
442 void setGeneralCategory(byte category) {
443 generalCategory = category;
444 }
445
446 public byte getGeneralCategory() {
447 return generalCategory;
448 }
449
450 void setBidiCategory(byte category) {
451 bidiCategory = category;
452 }
453
454 public byte getBidiCategory() {
455 return bidiCategory;
456 }
457
458 void setCombiningClass(int combiningClass) {
459 this.combiningClass = combiningClass;
460 }
461
462 public int getCombiningClass() {
463 return combiningClass;
464 }
465
466 void setDecomposition(String decomposition) {
467 this.decomposition = decomposition;
468 }
469
470 public String getDecomposition() {
471 return decomposition;
472 }
473
474 void setDecimalValue(int value) {
475 decimalValue = value;
476 }
477
478 public int getDecimalValue() {
479 return decimalValue;
480 }
481
482 public boolean isDecimalValue() {
483 return decimalValue != -1;
484 }
485
486 void setDigitValue(int value) {
487 digitValue = value;
488 }
489
490 public int getDigitValue() {
491 return digitValue;
492 }
493
494 public boolean isDigitValue() {
495 return digitValue != -1;
496 }
497
498 void setNumericValue(String value) {
499 numericValue = value;
500 }
501
502 public String getNumericValue() {
503 return numericValue;
504 }
505
506 public boolean isNumericValue() {
507 return numericValue.length() > 0;
508 }
509
510 void setMirrored(boolean value) {
511 mirrored = value;
512 }
513
514 public boolean isMirrored() {
515 return mirrored;
516 }
517
518 void setOldName(String name) {
519 oldName = name;
520 }
521
522 public String getOldName() {
523 return oldName;
524 }
525
526 void setComment(String comment) {
527 this.comment = comment;
528 }
529
530 public String getComment() {
531 return comment;
532 }
533
534 void setUpperMap(int ch) {
535 upperMap = ch;
536 };
537
538 public int getUpperMap() {
539 return upperMap;
540 }
541
542 public boolean hasUpperMap() {
543 return upperMap != MAP_UNDEFINED;
544 }
545
546 void setLowerMap(int ch) {
547 lowerMap = ch;
548 }
549
550 public int getLowerMap() {
551 return lowerMap;
552 }
553
554 public boolean hasLowerMap() {
555 return lowerMap != MAP_UNDEFINED;
556 }
557
558 void setTitleMap(int ch) {
559 titleMap = ch;
560 }
561
562 public int getTitleMap() {
563 return titleMap;
564 }
565
566 public boolean hasTitleMap() {
567 return titleMap != MAP_UNDEFINED;
568 }
569
570 int codePoint;
571 String name;
572 byte generalCategory;
573 byte bidiCategory;
574 int combiningClass;
575 String decomposition;
576 int decimalValue;
577 int digitValue;
578 String numericValue;
579 boolean mirrored;
580 String oldName;
581 String comment;
582 int upperMap;
583 int lowerMap;
584 int titleMap;
585
586
587
588 static final int REQUIRED_FIELDS = 15;
589
590
591
592
593
594 public static final byte
595 UNASSIGNED = 0,
596 UPPERCASE_LETTER = 1,
597 LOWERCASE_LETTER = 2,
598 TITLECASE_LETTER = 3,
599 MODIFIER_LETTER = 4,
600 OTHER_LETTER = 5,
601 NON_SPACING_MARK = 6,
602 ENCLOSING_MARK = 7,
603 COMBINING_SPACING_MARK = 8,
604 DECIMAL_DIGIT_NUMBER = 9,
605 LETTER_NUMBER = 10,
606 OTHER_NUMBER = 11,
607 SPACE_SEPARATOR = 12,
608 LINE_SEPARATOR = 13,
609 PARAGRAPH_SEPARATOR = 14,
610 CONTROL = 15,
611 FORMAT = 16,
612
613
614 PRIVATE_USE = 18,
615 SURROGATE = 19,
616 DASH_PUNCTUATION = 20,
617 START_PUNCTUATION = 21,
618 END_PUNCTUATION = 22,
619 CONNECTOR_PUNCTUATION = 23,
620 OTHER_PUNCTUATION = 24,
621 MATH_SYMBOL = 25,
622 CURRENCY_SYMBOL = 26,
623 MODIFIER_SYMBOL = 27,
624 OTHER_SYMBOL = 28,
625 INITIAL_QUOTE_PUNCTUATION = 29,
626 FINAL_QUOTE_PUNCTUATION = 30,
627
628
629
630 GENERAL_CATEGORY_COUNT = 31;
631
632 static final byte SHORT = 0, LONG = 1;
633
634
635
636
637 static final String[][] generalCategoryList = {
638 {"Cn", "UNASSIGNED"},
639 {"Lu", "UPPERCASE_LETTER"},
640 {"Ll", "LOWERCASE_LETTER"},
641 {"Lt", "TITLECASE_LETTER"},
642 {"Lm", "MODIFIER_LETTER"},
643 {"Lo", "OTHER_LETTER"},
644 {"Mn", "NON_SPACING_MARK"},
645 {"Me", "ENCLOSING_MARK"},
646 {"Mc", "COMBINING_SPACING_MARK"},
647 {"Nd", "DECIMAL_DIGIT_NUMBER"},
648 {"Nl", "LETTER_NUMBER"},
649 {"No", "OTHER_NUMBER"},
650 {"Zs", "SPACE_SEPARATOR"},
651 {"Zl", "LINE_SEPARATOR"},
652 {"Zp", "PARAGRAPH_SEPARATOR"},
653 {"Cc", "CONTROL"},
654 {"Cf", "FORMAT"},
655 {"xx", "unused"},
656 {"Co", "PRIVATE_USE"},
657 {"Cs", "SURROGATE"},
658 {"Pd", "DASH_PUNCTUATION"},
659 {"Ps", "START_PUNCTUATION"},
660 {"Pe", "END_PUNCTUATION"},
661 {"Pc", "CONNECTOR_PUNCTUATION"},
662 {"Po", "OTHER_PUNCTUATION"},
663 {"Sm", "MATH_SYMBOL"},
664 {"Sc", "CURRENCY_SYMBOL"},
665 {"Sk", "MODIFIER_SYMBOL"},
666 {"So", "OTHER_SYMBOL"},
667 {"Pi", "INITIAL_QUOTE_PUNCTUATION"},
668 {"Pf", "FINAL_QUOTE_PUNCTUATION"}
669 };
670
671
672
673
674 public static final byte
675 DIRECTIONALITY_UNDEFINED = -1,
676
677 DIRECTIONALITY_LEFT_TO_RIGHT = 0,
678 DIRECTIONALITY_RIGHT_TO_LEFT = 1,
679 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2,
680
681 DIRECTIONALITY_EUROPEAN_NUMBER = 3,
682 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4,
683 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5,
684 DIRECTIONALITY_ARABIC_NUMBER = 6,
685 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7,
686 DIRECTIONALITY_NONSPACING_MARK = 8,
687 DIRECTIONALITY_BOUNDARY_NEUTRAL = 9,
688
689 DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10,
690 DIRECTIONALITY_SEGMENT_SEPARATOR = 11,
691 DIRECTIONALITY_WHITESPACE = 12,
692 DIRECTIONALITY_OTHER_NEUTRALS = 13,
693
694 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14,
695 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15,
696 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16,
697 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17,
698 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18,
699
700 DIRECTIONALITY_CATEGORY_COUNT = 19;
701
702
703
704
705 static final String[][] bidiCategoryList = {
706 {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"},
707 {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"},
708 {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"},
709 {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"},
710 {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"},
711 {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"},
712 {"AN", "DIRECTIONALITY_ARABIC_NUMBER"},
713 {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"},
714 {"NSM", "DIRECTIONALITY_NONSPACING_MARK"},
715 {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"},
716 {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"},
717 {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"},
718 {"WS", "DIRECTIONALITY_WHITESPACE"},
719 {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"},
720 {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"},
721 {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"},
722 {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"},
723 {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"},
724 {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"},
725
726 };
727
728
729 static final byte
730 FIELD_VALUE = 0,
731 FIELD_NAME = 1,
732 FIELD_CATEGORY = 2,
733 FIELD_CLASS = 3,
734 FIELD_BIDI = 4,
735 FIELD_DECOMPOSITION = 5,
736 FIELD_DECIMAL = 6,
737 FIELD_DIGIT = 7,
738 FIELD_NUMERIC = 8,
739 FIELD_MIRRORED = 9,
740 FIELD_OLDNAME = 10,
741 FIELD_COMMENT = 11,
742 FIELD_UPPERCASE = 12,
743 FIELD_LOWERCASE = 13,
744 FIELD_TITLECASE = 14;
745
746 static final Pattern tokenSeparator = Pattern.compile(";");
747
748 public static void main(String[] args) {
749 UnicodeSpec[] spec = null;
750 if (args.length == 2 ) {
751 try {
752 File file = new File(args[0]);
753 int plane = Integer.parseInt(args[1]);
754 spec = UnicodeSpec.readSpecFile(file, plane);
755 System.out.println("UnicodeSpec[" + spec.length + "]:");
756 for (int x=0; x<spec.length; x++) {
757 System.out.println(spec[x].toString());
758 }
759 }
760 catch(Exception e) {
761 e.printStackTrace();
762 }
763 }
764
765 }
766
767 }